These analyses examine the relationship between expression levels and viral titer for a set of genes that we hypothesize have a relationship with viral infection. The overall strategy is to:

  1. Identify a list of candidate genes based on the literature and previous analyses.
  2. Subset our overall set of transcripts to include only those with an annotation containing the name of the candidate gene in the SwissProt gene name, SwissProt gene function, KEGG pathway, or EggNOG gene orthology annotations.
  3. Manually filter search results to remove non-target inclusions
  4. Remove transcripts with no variation in expression for each comparison (e.g., bursa at transcript-level)
  5. Perform linear regressions for all candidate transcripts, recording the p-value, adjusted p-value, R2, and beta coefficient (i.e., slope or effect size) for every test.
  6. Correct the p-values for multiple testing using a false discovery rate approach with an adjusted p-values (i.e., q-value) cutoff of 0.05.
  7. Plot the results

Note: These analyses include only control and infected birds sacrificed at 1 day post-infection

Functions for analyzing and plotting data

aprioriAnalysis <- function(target, targetTissue, targetLevel, ...) {
  result.tmp <- lcpm.all %>%
    filter(levelGT == targetLevel,
           identifier == target,
           tissue == targetTissue) %>%
    lm(lcpm ~ log.virus.sac, data = .) %>%
    tidy(.) %>%
    mutate(identifier = target) %>%
    filter(term == "log.virus.sac") %>%
    select(-term, -std.error, -statistic) %>%
    pivot_longer(cols = estimate:p.value,
                 names_to = "result.type",
                 values_to = "results.value")

  lcpm.all %>%
    filter(levelGT == targetLevel,
           identifier == target,
           tissue == targetTissue) %>%
    lm(lcpm ~ log.virus.sac, data = .) %>%
    glance(.) %>%
    mutate(identifier = target) %>%
    select(r.squared, adj.r.squared, identifier) %>%
    pivot_longer(cols = r.squared:adj.r.squared,
                 names_to = "result.type",
                 values_to = "results.value") %>%
    rbind(result.tmp)
}

aprioriPlotting <- function(target, targetTissue, targetLevel, ...) {
  ifelse(targetLevel == "gene", 
         statResults <- results.IG.tib %>%
           filter(identifier == target), 
         statResults <- results.IT.tib %>%
           filter(identifier == target))
  
  annotation <- annot.all %>%
    filter(transcript_id == target | gene_id == target) %>%
    filter(sprot_geneName_BlastX != ".") %>%
    select(sprot_geneName_BlastX) %>%
    slice(1)
  
  plot <- lcpm.all %>%
    filter(levelGT == targetLevel,
           identifier == target,
           tissue == targetTissue) %>%
    unite(birdLabel, bird, SSgroup.virus.sac, sep = "-") %>% 
    ggplot(aes(x = log.virus.sac, y = lcpm)) +
    geom_point() +
    geom_smooth(method = "lm") +
    geom_text_repel(aes(label = birdLabel), ) +
    ylab("Log2(Counts per million)") +
    xlab("Log10(Viral titer + 1)") +
    theme_classic() +
    labs(title=paste0(annotation[1], " - ", target),
         subtitle=paste0(targetTissue, " - ", targetLevel,
                         "; p = ", round(statResults[4,3], 5),
                         "; adj R2 = ", round(statResults[2,3], 3)))
  print(plot)
}

Run analysis loop

Script example - others hidden

#### Run analysis loop ####
targetTissue <- "Ileum" ## Bursa or Ileum
targetLevel <- "gene" ## gene or transcript
set <- lcpm.all %>%
  filter(levelGT == targetLevel, tissue == targetTissue) %>%
  group_by(identifier) %>%
  summarize(varLCPM = round(var(lcpm), 5)) %>%
  filter(varLCPM > 0)

results <- list()

for(z in unique(set$identifier)) {
  results[[length(results)+1]] <- aprioriAnalysis(z, targetTissue, targetLevel)
}

results.IG.tib <- bind_rows(results, .id = "column_label") %>%
  select(-column_label)

results.IG.tmp <- results.IG.tib %>%
  filter(result.type == "p.value") %>%
  mutate(adj.p.value = p.adjust(results.value, method='fdr', n = nrow(.))) %>%
  filter(adj.p.value < 0.1)

results.IG.sig <- results.IG.tib %>% filter(identifier %in% results.IG.tmp$identifier)

Table of results

Tissue Level Analyzed Significant
Bursa Transcript 1848 0
Bursa Gene 762 0
Ileum Transcript 1991 51
Ileum Gene 812 42

Distribution of p-values

Plotting significant results for ileum at gene level

Plotting significant results for ileum at transcript level